{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 引用了[这里](https://github.com/pytorch/examples/tree/master/word_language_model )的部分代码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import numpy as np\n",
    "from torch.nn.utils import clip_grad_norm\n",
    "from data_utils import Dictionary, Corpus#Corpus制作单词对应索引【唯一】，通过w2id->id2w【id2w不唯一】，return ids.view(batch_size, -1)即为输入数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 配置GPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cuda\n"
     ]
    }
   ],
   "source": [
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "print(device)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 超参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "embed_size = 128\n",
    "hidden_size = 1024\n",
    "num_layers = 1\n",
    "num_epochs = 5\n",
    "num_samples = 1000 #要采样的单词数目\n",
    "batch_size = 20\n",
    "seq_length = 30\n",
    "learning_rate = 0.002"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 加载宾州树库（Penn Treebank）数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "corpus = Corpus()\n",
    "ids = corpus.get_data('data/train.txt', batch_size)\n",
    "vocab_size = len(corpus.dictionary)\n",
    "num_batches = ids.size(1) // seq_length"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 基于RNN的语言模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 输入：x, (h, c) 我认为c也是有的，参照训练时给出的input···；x形状ids[:, i:i+seq_length]，对应下面的形状，这里seq_length既是batch_size~\n",
    "# 输出：out, (h, c)， out为所有时间步的隐藏层状态，形状【num_layers, batch_size, hidden_size】\n",
    "# loss： out 和 target(input的后移一个单词)，形状同上。target也可以表述为ids[:, (i+1):(i+1)+seq_length]\n",
    "class RNNLM(nn.Module):\n",
    "    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):\n",
    "        super(RNNLM, self).__init__()\n",
    "        self.embed = nn.Embedding(vocab_size, embed_size)\n",
    "        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)\n",
    "        self.linear = nn.Linear(hidden_size, vocab_size)\n",
    "    \n",
    "    def forward(self, x, h):\n",
    "        # 嵌入 单词索引 成 向量\n",
    "        x = self.embed(x)\n",
    "        # 前向传播LSTM\n",
    "        out, (h, c) = self.lstm(x, h)\n",
    "        # 将输出形状转变为 (batch_size * sequence_length, hidden_size)\n",
    "        out = out.reshape(out.size(0)*out.size(1), out.size(2))\n",
    "        # 解码所有时间步的隐藏层状态\n",
    "        out = self.linear(out)\n",
    "        return out, (h, c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = RNNLM(vocab_size, embed_size, hidden_size, num_layers).to(device)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 损失 和 优化 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "criterion = nn.CrossEntropyLoss()\n",
    "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Truncated backpropagation截断反向传播\n",
    "# detach 就是截断反向传播的梯度流\n",
    "def detach(states):\n",
    "    return [state.detach() for state in states]\n",
    "#对于输入序列很长的问题，BPTT可能很难训练复现神经网络。除了速度之外，在很多时间步长上累积梯度可能导致值缩小到零，或者最终溢出或爆炸的值增长。\n",
    "#BPTT的修改是限制在向后传递上使用的时间步数，并且实际上估计用于更新权重的梯度而不是完全计算它。\n",
    "#这种变化称为截断反向传播时间或TBPTT。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "F:\\Anaconda\\envs\\tensorflow-gpu\\lib\\site-packages\\ipykernel_launcher.py:18: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch [1/5], Step[0/1549], Loss: 9.0049, Perplexity: 8142.91\n",
      "Epoch [1/5], Step[100/1549], Loss: 6.0402, Perplexity: 419.98\n",
      "Epoch [1/5], Step[200/1549], Loss: 5.9137, Perplexity: 370.06\n",
      "Epoch [1/5], Step[300/1549], Loss: 5.7529, Perplexity: 315.10\n",
      "Epoch [1/5], Step[400/1549], Loss: 5.6651, Perplexity: 288.61\n",
      "Epoch [1/5], Step[500/1549], Loss: 5.1396, Perplexity: 170.64\n",
      "Epoch [1/5], Step[600/1549], Loss: 5.2108, Perplexity: 183.24\n",
      "Epoch [1/5], Step[700/1549], Loss: 5.3828, Perplexity: 217.64\n",
      "Epoch [1/5], Step[800/1549], Loss: 5.1845, Perplexity: 178.48\n",
      "Epoch [1/5], Step[900/1549], Loss: 5.0508, Perplexity: 156.15\n",
      "Epoch [1/5], Step[1000/1549], Loss: 5.1414, Perplexity: 170.95\n",
      "Epoch [1/5], Step[1100/1549], Loss: 5.2922, Perplexity: 198.77\n",
      "Epoch [1/5], Step[1200/1549], Loss: 5.1779, Perplexity: 177.31\n",
      "Epoch [1/5], Step[1300/1549], Loss: 5.0767, Perplexity: 160.25\n",
      "Epoch [1/5], Step[1400/1549], Loss: 4.8640, Perplexity: 129.54\n",
      "Epoch [1/5], Step[1500/1549], Loss: 5.1361, Perplexity: 170.05\n",
      "Epoch [2/5], Step[0/1549], Loss: 5.3772, Perplexity: 216.42\n",
      "Epoch [2/5], Step[100/1549], Loss: 4.5647, Perplexity: 96.03\n",
      "Epoch [2/5], Step[200/1549], Loss: 4.6863, Perplexity: 108.45\n",
      "Epoch [2/5], Step[300/1549], Loss: 4.6517, Perplexity: 104.76\n",
      "Epoch [2/5], Step[400/1549], Loss: 4.5128, Perplexity: 91.18\n",
      "Epoch [2/5], Step[500/1549], Loss: 4.1190, Perplexity: 61.50\n",
      "Epoch [2/5], Step[600/1549], Loss: 4.4773, Perplexity: 88.00\n",
      "Epoch [2/5], Step[700/1549], Loss: 4.4152, Perplexity: 82.70\n",
      "Epoch [2/5], Step[800/1549], Loss: 4.4670, Perplexity: 87.09\n",
      "Epoch [2/5], Step[900/1549], Loss: 4.1684, Perplexity: 64.61\n",
      "Epoch [2/5], Step[1000/1549], Loss: 4.3431, Perplexity: 76.94\n",
      "Epoch [2/5], Step[1100/1549], Loss: 4.4994, Perplexity: 89.97\n",
      "Epoch [2/5], Step[1200/1549], Loss: 4.4498, Perplexity: 85.61\n",
      "Epoch [2/5], Step[1300/1549], Loss: 4.1828, Perplexity: 65.55\n",
      "Epoch [2/5], Step[1400/1549], Loss: 3.9529, Perplexity: 52.09\n",
      "Epoch [2/5], Step[1500/1549], Loss: 4.2864, Perplexity: 72.70\n",
      "Epoch [3/5], Step[0/1549], Loss: 4.4303, Perplexity: 83.96\n",
      "Epoch [3/5], Step[100/1549], Loss: 3.8548, Perplexity: 47.22\n",
      "Epoch [3/5], Step[200/1549], Loss: 4.0469, Perplexity: 57.22\n",
      "Epoch [3/5], Step[300/1549], Loss: 3.8840, Perplexity: 48.62\n",
      "Epoch [3/5], Step[400/1549], Loss: 3.8002, Perplexity: 44.71\n",
      "Epoch [3/5], Step[500/1549], Loss: 3.4586, Perplexity: 31.77\n",
      "Epoch [3/5], Step[600/1549], Loss: 3.8357, Perplexity: 46.33\n",
      "Epoch [3/5], Step[700/1549], Loss: 3.7374, Perplexity: 41.99\n",
      "Epoch [3/5], Step[800/1549], Loss: 3.8159, Perplexity: 45.42\n",
      "Epoch [3/5], Step[900/1549], Loss: 3.4448, Perplexity: 31.34\n",
      "Epoch [3/5], Step[1000/1549], Loss: 3.5992, Perplexity: 36.57\n",
      "Epoch [3/5], Step[1100/1549], Loss: 3.7366, Perplexity: 41.95\n",
      "Epoch [3/5], Step[1200/1549], Loss: 3.7786, Perplexity: 43.75\n",
      "Epoch [3/5], Step[1300/1549], Loss: 3.4378, Perplexity: 31.12\n",
      "Epoch [3/5], Step[1400/1549], Loss: 3.2178, Perplexity: 24.97\n",
      "Epoch [3/5], Step[1500/1549], Loss: 3.6239, Perplexity: 37.48\n",
      "Epoch [4/5], Step[0/1549], Loss: 3.5099, Perplexity: 33.45\n",
      "Epoch [4/5], Step[100/1549], Loss: 3.3471, Perplexity: 28.42\n",
      "Epoch [4/5], Step[200/1549], Loss: 3.5657, Perplexity: 35.36\n",
      "Epoch [4/5], Step[300/1549], Loss: 3.3045, Perplexity: 27.24\n",
      "Epoch [4/5], Step[400/1549], Loss: 3.3186, Perplexity: 27.62\n",
      "Epoch [4/5], Step[500/1549], Loss: 2.9460, Perplexity: 19.03\n",
      "Epoch [4/5], Step[600/1549], Loss: 3.3439, Perplexity: 28.33\n",
      "Epoch [4/5], Step[700/1549], Loss: 3.2306, Perplexity: 25.29\n",
      "Epoch [4/5], Step[800/1549], Loss: 3.2816, Perplexity: 26.62\n",
      "Epoch [4/5], Step[900/1549], Loss: 2.9673, Perplexity: 19.44\n",
      "Epoch [4/5], Step[1000/1549], Loss: 3.1314, Perplexity: 22.91\n",
      "Epoch [4/5], Step[1100/1549], Loss: 3.1693, Perplexity: 23.79\n",
      "Epoch [4/5], Step[1200/1549], Loss: 3.3079, Perplexity: 27.33\n",
      "Epoch [4/5], Step[1300/1549], Loss: 2.9390, Perplexity: 18.90\n",
      "Epoch [4/5], Step[1400/1549], Loss: 2.7251, Perplexity: 15.26\n",
      "Epoch [4/5], Step[1500/1549], Loss: 3.1901, Perplexity: 24.29\n",
      "Epoch [5/5], Step[0/1549], Loss: 2.9890, Perplexity: 19.87\n",
      "Epoch [5/5], Step[100/1549], Loss: 3.0502, Perplexity: 21.12\n",
      "Epoch [5/5], Step[200/1549], Loss: 3.0938, Perplexity: 22.06\n",
      "Epoch [5/5], Step[300/1549], Loss: 3.0216, Perplexity: 20.52\n",
      "Epoch [5/5], Step[400/1549], Loss: 2.9911, Perplexity: 19.91\n",
      "Epoch [5/5], Step[500/1549], Loss: 2.6446, Perplexity: 14.08\n",
      "Epoch [5/5], Step[600/1549], Loss: 3.0355, Perplexity: 20.81\n",
      "Epoch [5/5], Step[700/1549], Loss: 2.9438, Perplexity: 18.99\n",
      "Epoch [5/5], Step[800/1549], Loss: 3.0309, Perplexity: 20.72\n",
      "Epoch [5/5], Step[900/1549], Loss: 2.6664, Perplexity: 14.39\n",
      "Epoch [5/5], Step[1000/1549], Loss: 2.8290, Perplexity: 16.93\n",
      "Epoch [5/5], Step[1100/1549], Loss: 2.8194, Perplexity: 16.77\n",
      "Epoch [5/5], Step[1200/1549], Loss: 3.0170, Perplexity: 20.43\n",
      "Epoch [5/5], Step[1300/1549], Loss: 2.6123, Perplexity: 13.63\n",
      "Epoch [5/5], Step[1400/1549], Loss: 2.4335, Perplexity: 11.40\n",
      "Epoch [5/5], Step[1500/1549], Loss: 2.8403, Perplexity: 17.12\n"
     ]
    }
   ],
   "source": [
    "for epoch in range(num_epochs):\n",
    "    # 设置隐藏层和神经元状态\n",
    "    states = (torch.zeros(num_layers, batch_size, hidden_size).to(device),\n",
    "             torch.zeros(num_layers, batch_size, hidden_size).to(device))\n",
    "    \n",
    "    for i in range(0, ids.size(1) - seq_length, seq_length):\n",
    "        # 获得 迷你-batch 输入 和 目标\n",
    "        inputs = ids[:, i:i+seq_length].to(device)\n",
    "        targets = ids[:, (i+1):(i+1)+seq_length].to(device)\n",
    "        # 前向传播\n",
    "        states = detach(states)\n",
    "        outputs, states = model(inputs, states)\n",
    "        loss = criterion(outputs, targets.reshape(-1))\n",
    "        \n",
    "        # 反向传播和优化\n",
    "        model.zero_grad()\n",
    "        loss.backward()\n",
    "        clip_grad_norm(model.parameters(), 0.5)# 个人将它理解为神经网络训练时候的drop out的方法，用于解决神经网络训练过拟合的方法\n",
    "        optimizer.step()\n",
    "        \n",
    "        step = (i+1) // seq_length\n",
    "        if step % 100 == 0:\n",
    "            print('Epoch [{}/{}], Step[{}/{}], Loss: {:.4f}, Perplexity: {:5.2f}'\n",
    "                   .format(epoch+1, num_epochs, step, num_batches, loss.item(), np.exp(loss.item())))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 测试模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sampled [100/1000] words and save to sample.txt\n",
      "Sampled [200/1000] words and save to sample.txt\n",
      "Sampled [300/1000] words and save to sample.txt\n",
      "Sampled [400/1000] words and save to sample.txt\n",
      "Sampled [500/1000] words and save to sample.txt\n",
      "Sampled [600/1000] words and save to sample.txt\n",
      "Sampled [700/1000] words and save to sample.txt\n",
      "Sampled [800/1000] words and save to sample.txt\n",
      "Sampled [900/1000] words and save to sample.txt\n",
      "Sampled [1000/1000] words and save to sample.txt\n"
     ]
    }
   ],
   "source": [
    "with torch.no_grad():\n",
    "    with open('sample.txt', 'w') as f:\n",
    "        # 初始化隐藏层和神经元\n",
    "        state = (torch.zeros(num_layers, 1, hidden_size).to(device),\n",
    "                torch.zeros(num_layers, 1, hidden_size).to(device))\n",
    "        # 随机选取一个单词的索引\n",
    "        prob = torch.ones(vocab_size)\n",
    "        input_ = torch.multinomial(prob, num_samples=1).unsqueeze(1).to(device)\n",
    "        #.multinomial作用是对input的每一行做n_samples次取值，输出的张量是每一次取值时input张量对应行的下标,每个样本被采样的概率由logits给出\n",
    "        #.unsqueeze(1)作用是维度扩充\n",
    "        for i in range(num_samples):\n",
    "            # 前向传播RNN\n",
    "            output, state = model(input_, state)\n",
    "            # 从输出采样出一个单词索引(概率遵循logits)\n",
    "            prob = output.exp()\n",
    "            word_id = torch.multinomial(prob, num_samples=1).item()\n",
    "            #用采样的单词id填充输入，以备下一个步骤使用\n",
    "            input_.fill_(word_id)\n",
    "            #写入文件\n",
    "            word = corpus.dictionary.idx2word[word_id]\n",
    "            word = '\\n' if word == '<eos>' else word + ' '    #EOS(end of sentence)\n",
    "            f.write(word)\n",
    "            \n",
    "            if (i+1) % 100 == 0:\n",
    "                print('Sampled [{}/{}] words and save to {}'.format(i+1, num_samples, 'sample.txt'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 保存模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.save(model.state_dict(), 'model.ckpt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tensorflow-gpu",
   "language": "python",
   "name": "tensorflow-gpu"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
